# load("xxx.RData")



### PROJECT LEVEL VARIABLES
  
# A) Production Function Category

# Discretized production inputs:

X_I <- as.factor( where(prodfn) )

# Predicted impact:

Y <- Predict(prodfn)

###

# B) Project Level Costs

attach(articles)

# Censor number of authors at 4:

num_authors_trunc <- pmin(num_authors, 4)
	names(num_authors_trunc) <- rownames(articles)

# Common cost factor is number of authors:

X_C <- as.factor(num_authors_trunc)

# Check:

nlevels(X_C)
levels(X_C)

detach(articles)

# Drop empty levels:

X_C <- droplevels(X_C)

###

# C) Project Category

# Combine cost and production category:

X_P <- X_C : as.factor(X_I)

# Number of categories:

nlevels(X_P)

# Drop empty levels:

X_P <- droplevels(X_P)

# Final number of categories:

nlevels(X_C)
nlevels(X_P)

# Identifiers:

names(X_I) <- names(X_C) <- rownames(articles)
names(X_P) <- rownames(articles)
names(Y) <- rownames(articles)

### 

# D) Topical Area

attach(articles)

# Storage:

topic_code <- rep("000", nrow(articles))
	names(topic_code) <- rownames(articles)

# Assign values:

topic_code[apmic > 0.5] <- "apm"
topic_code[methy > 0.5] <- "mty"
topic_code[macro > 0.5] <- "mac"
topic_code[bzfin > 0.5] <- "bfn"
topic_code[agloc > 0.5] <- "alo"

# Reformat:

topic_code <- as.factor(topic_code)

detach(articles)
